Consecutive dry days#
import os
import os.path as op
import sys
import folium
import numpy as np
import pandas as pd
sys.path.append("../../../../indicators_setup")
from ind_setup.plotting_int import plot_timeseries_interactive
from ind_setup.plotting import plot_bar_probs
sys.path.append("../../../functions")
from data_downloaders import GHCN
country = 'Palau'
vars_interest = ['PRCP']
Get Data#
update_data = False
path_data = "../../../data"
if update_data:
df_country = GHCN.get_country_code(country)
print(f'The GHCN code for {country} is {df_country["Code"].values[0]}')
df_stations = GHCN.download_stations_info()
df_country_stations = df_stations[df_stations['ID'].str.startswith(df_country.Code.values[0])]
print(f'There are {df_country_stations.shape[0]} stations in {country}')
Using Koror Station#
if update_data:
GHCND_dir = 'https://www.ncei.noaa.gov/data/global-historical-climatology-network-daily/access/'
id = 'PSW00040309' # Koror Station
dict_prcp = GHCN.extract_dict_data_var(GHCND_dir, 'PRCP', df_country_stations.loc[df_country_stations['ID'] == id])[0]
data = dict_prcp[0]['data']#.dropna()
data.to_pickle(op.join(path_data, 'GHCN_precipitation.pkl'))
else:
data = pd.read_pickle(op.join(path_data, 'GHCN_precipitation.pkl'))
dict_prcp = [{'data' : data, 'var' : 'PRCP', 'ax' : 1, 'label' : 'Precipitation'},]
fig = plot_timeseries_interactive(dict_prcp, trendline=True, ylims = [None, None], figsize = (25, 12))
threshold = 1 #Threshold for dry and wet day
data = dict_prcp[0]['data']#.dropna()
data = data.groupby(data.index.year).filter(lambda x: len(x) >= 300).dropna()
data['dry_day'] = np.where(data['PRCP'] < threshold, 1, 0)
def consecutive_dry_days(series):
consec_dry = 0
max_consec_dry = 0
for value in series:
if value: # If it's a dry day (True)
consec_dry += 1
else: # If it's not a dry day (False)
max_consec_dry = max(max_consec_dry, consec_dry)
consec_dry = 0
return max_consec_dry
consecutive_dry_year = data.groupby(data.index.year)['dry_day'].apply(consecutive_dry_days)
threshold = 1
data['below_threshold'] = data['PRCP'] < threshold
# Función para calcular días consecutivos por debajo del umbral
def count_consecutive_days(series):
count = 0
result = []
for value in series:
if value:
count += 1
else:
count = 0
result.append(count)
return result
# Aplica la función
data['consecutive_days'] = count_consecutive_days(data['below_threshold'])
plot_bar_probs(np.unique(data.index.year), data.groupby(data.index.year)['consecutive_days'].mean(),
trendline =True, y_label = 'Mean consecutive dry days [< 1mm]',
figsize = [12, 5])
<Axes: ylabel='Mean consecutive dry days [< 1mm]'>
plot_bar_probs(np.unique(data.index.year), data.groupby(data.index.year)['consecutive_days'].max(),
trendline =True, y_label = 'Maximum consecutive dry days [< 1mm]',
figsize = [12, 5])
<Axes: ylabel='Maximum consecutive dry days [< 1mm]'>
datag = data.groupby(data.index.year).max()
datag.index = pd.to_datetime(datag.index, format = '%Y')
dict_plot = [{'data' : datag, 'var' : 'consecutive_days', 'ax' : 1, 'label':'Max. number of consecutive dry days [< 1mm]'},]
# Drop missing values for the specified variable
data = datag['consecutive_days'].dropna()
# Convert the time index to numerical values for fitting
time = data.index.values
time_num = time.view('int64') // 1e9 # Convert to seconds since epoch (numeric format)
# Fit a linear trendline (degree 1 polynomial)
coefficients = np.polyfit(time_num, data.values, 1) # Linear fit
trendline = np.poly1d(coefficients) # Create a polynomial function for the trendline
from scipy.stats import linregress
a, b, c, p_value, _ = linregress(time_num, data.values)
plot_timeseries_interactive(dict_plot, trendline = True, figsize = (25, 12));